For the reader: You can click the black “code” box on the right hand side to either see or hide the R code behind analysis results and graphs.

The results are arranged to separate tabs by site/area, so that it’s easy to look at just the results of the site of interest, instead of scrolling accidentally down to a different site and getting confused.

The results for each site consist of four parts: pXRF, particle size distribution, loss on ignition, and colorimetry.

Sites

Ashdod-Yam

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AY.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all))
##  Area  Type   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Mn 
##     0     0     1     0     0     0    43     1     0     0     0    28     0 
##    Fe    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Nb    Ba 
##     0     1     0     0    27     0     0     2     0    31    45

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-V) %>% 
        select(-As) %>% 
        select(-Nb) %>% 
        select(-Ba)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final)
##  [1] "Area" "Type" "MgO"  "Ti"   "Mn"   "Fe"   "Cu"   "Zn"   "Rb"   "Sr"  
## [11] "Y"    "Zr"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 7), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.3836 1.2233 1.0086 0.84957 0.60483 0.49903 0.44478
## Proportion of Variance 0.5719 0.1506 0.1024 0.07265 0.03682 0.02507 0.01991
## Cumulative Proportion  0.5719 0.7225 0.8249 0.89752 0.93434 0.95941 0.97932
##                           PC8     PC9    PC10
## Standard deviation     0.3538 0.22250 0.17534
## Proportion of Variance 0.0126 0.00498 0.00309
## Cumulative Proportion  0.9919 0.99691 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by area")

autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by sample type")

PCA(pxrf_final[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 47 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=7)

area <- as.factor(pxrf_final[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]

plot(dend, main="HCA with sample areas")
legend("topright", legend = levels(area), fill = cols_a)

# HCA dendrogram, samples color coded by type:
dend2 <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=7)

type <- as.factor(pxrf_final[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
col_type <- cols_t[type]
labels_colors(dend2) <- col_type[order.dendrogram(dend2)]

plot(dend2, main="HCA with sample types")
legend("topright", legend = levels(type), fill = cols_t)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=2)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_AY.xlsx", sep=";")
tga <- read.xlsx("data/tga_AY.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:52          Min.   :8.079   Min.   :10.13   Min.   :1.538  
##  Class :character   1st Qu.:8.566   1st Qu.:11.00   1st Qu.:2.464  
##  Mode  :character   Median :9.006   Median :11.71   Median :2.635  
##                     Mean   :8.994   Mean   :11.55   Mean   :2.559  
##                     3rd Qu.:9.435   3rd Qu.:12.05   3rd Qu.:2.824  
##                     Max.   :9.925   Max.   :12.68   Max.   :3.043  
##    dry_weight     after_550_C     after_950_C      context         
##  Min.   :10.11   Min.   :10.08   Min.   :10.05   Length:52         
##  1st Qu.:10.97   1st Qu.:10.94   1st Qu.:10.86   Class :character  
##  Median :11.67   Median :11.62   Median :11.53   Mode  :character  
##  Mean   :11.52   Mean   :11.47   Mean   :11.41                     
##  3rd Qu.:12.00   3rd Qu.:11.96   3rd Qu.:11.93                     
##  Max.   :12.65   Max.   :12.60   Max.   :12.52                     
##      type          
##  Length:52         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:53          Length:53          Length:53          Length:53         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950           context         
##  Length:53          Length:53          Length:53          Length:53         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      type          
##  Length:53         
##  Class :character  
##  Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi)

boxplot(c950~context, data=loi)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(loi$c950)
## $stats
## [1] 0.398150 1.528018 2.198513 3.162314 4.889189
## 
## $n
## [1] 52
## 
## $conf
## [1] 1.840428 2.556598
## 
## $out
## [1]  8.041755 11.960444
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)

# Removing duplicates from the next graph
rownames(loi)
##  [1] "AY-1"    "AY-2"    "AY-3"    "AY-4"    "AY-5"    "AY-6"    "AY-7"   
##  [8] "AY-8"    "AY-9"    "AY-10"   "AY-11"   "AY-12"   "AY-13"   "AY-14"  
## [15] "AY-15"   "AY-16"   "AY-17"   "AY-18"   "AY-19"   "AY-20"   "AY-21"  
## [22] "AY-22"   "AY-23"   "AY-24"   "AY-25"   "AY-26"   "AY-27"   "AY-28"  
## [29] "AY-29"   "AY-30"   "AY-31"   "AY-32"   "AY-33"   "AY-34"   "AY-35"  
## [36] "AY-36"   "AY-37"   "AY-38"   "AY-39"   "AY-40"   "AY-41"   "AY-42"  
## [43] "AY-50"   "AY-51"   "AY-52"   "AY-53"   "AY-54"   "AY-50_2" "AY-51_2"
## [50] "AY-52_2" "AY-53_2" "AY-54_2"
loi <- subset(loi[1:47, ])

# Color by context
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by type
ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(type), colour = factor(context))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga)

boxplot(c950~context, data=tga)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(tga$c950)
## $stats
## [1] 0.7233976 1.5855926 2.3958197 3.4621816 5.4590326
## 
## $n
## [1] 50
## 
## $conf
## [1] 1.976504 2.815135
## 
## $out
## [1] 7.994718
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])

ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(tga$c550)

barplot(tga$c950)

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AY.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :37.56   Min.   : 7.97   Min.   :16.81  
##  1st Qu.:42.13   1st Qu.: 9.72   1st Qu.:19.76  
##  Median :43.64   Median :10.38   Median :21.18  
##  Mean   :43.92   Mean   :10.49   Mean   :21.22  
##  3rd Qu.:45.91   3rd Qu.:11.29   3rd Qu.:22.34  
##  Max.   :48.24   Max.   :13.53   Max.   :25.03
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Ashdod-Yam: Byzantine

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AYB.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all))
##  Area  Type   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti    Mn    Fe 
##     0     0     1     0     0     0     1     0     2     0     0     0     0 
##    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Pb 
##     0     0     0     1     1     0     2     0     3

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-As) %>% 
        select(-Pb) 

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final)
##  [1] "Area" "Type" "MgO"  "Ti"   "Mn"   "Fe"   "Cu"   "Zn"   "Rb"   "Sr"  
## [11] "Y"    "Zr"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 4), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6      PC7
## Standard deviation     2.3569 1.4536 0.9800 0.72263 0.38973 0.17504 1.58e-16
## Proportion of Variance 0.5952 0.2264 0.1029 0.05595 0.01627 0.00328 0.00e+00
## Cumulative Proportion  0.5952 0.8216 0.9245 0.98044 0.99672 1.00000 1.00e+00
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Byzantine elements")

autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Byzantine grouped by sample type")

PCA(pxrf_final[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 7 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by type:
dend2 <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=5)

type <- as.factor(pxrf_final[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
col_type <- cols_t[type]
labels_colors(dend2) <- col_type[order.dendrogram(dend2)]

plot(dend2, main="HCA with sample types")
legend("topright", legend = levels(type), fill = cols_t)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AYB.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_AYB.xlsx", sep=";")
tga <- read.xlsx("data/tga_AYB.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:7           Min.   :8.382   Min.   :10.92   Min.   :2.257  
##  Class :character   1st Qu.:8.555   1st Qu.:11.39   1st Qu.:2.613  
##  Mode  :character   Median :8.666   Median :11.50   Median :2.880  
##                     Mean   :8.943   Mean   :11.68   Mean   :2.739  
##                     3rd Qu.:9.442   3rd Qu.:12.13   3rd Qu.:2.905  
##                     Max.   :9.560   Max.   :12.31   Max.   :3.002  
##    dry_weight     after_550_C     after_950_C      context         
##  Min.   :10.83   Min.   :10.62   Min.   :10.50   Length:7          
##  1st Qu.:11.33   1st Qu.:11.25   1st Qu.:11.04   Class :character  
##  Median :11.45   Median :11.39   Median :11.21   Mode  :character  
##  Mean   :11.63   Mean   :11.55   Mean   :11.24                     
##  3rd Qu.:12.11   3rd Qu.:12.05   3rd Qu.:11.43                     
##  Max.   :12.26   Max.   :12.24   Max.   :12.06                     
##      type          
##  Length:7          
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date      Batch               Wet           
##  Length:7           Min.   :44536   Length:7           Length:7          
##  Class :character   1st Qu.:44536   Class :character   Class :character  
##  Mode  :character   Median :44536   Mode  :character   Mode  :character  
##                     Mean   :44536                                        
##                     3rd Qu.:44536                                        
##                     Max.   :44536                                        
##      Dry              Mass_550           Mass_950           context         
##  Length:7           Length:7           Length:7           Length:7          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      type          
##  Length:7          
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)

# Color by type      
ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(context), colour = factor(type))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)

# Color by type      
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(type))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AYB.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :34.56   Min.   : 2.480   Min.   : 6.98  
##  1st Qu.:38.93   1st Qu.: 5.215   1st Qu.:12.73  
##  Median :45.30   Median : 5.540   Median :13.28  
##  Mean   :46.69   Mean   : 6.211   Mean   :14.56  
##  3rd Qu.:50.95   3rd Qu.: 6.675   3rd Qu.:16.21  
##  Max.   :67.20   Max.   :11.680   Max.   :23.75
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Israel

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_israel.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all))
##  Area  Type   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti    Mn    Fe 
##     0     0     0     0     1     0     3     0     8     0     0     0     0 
##    Ni    Cu    Zn    Rb    Sr     Y    Zr    Nb 
##    11     0     0     6     0    16     0    17

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-Nb) 

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final)
##  [1] "Area" "Type" "MgO"  "Ti"   "Mn"   "Fe"   "Cu"   "Zn"   "Rb"   "Sr"  
## [11] "Y"    "Zr"

pxrf_MB: Final analysis data set for MUDBRICK samples only (= no floor or lime plaster)

pxrf_MB <- pxrf_final[-c(1:7), ]
rownames(pxrf_MB)
##  [1] "AH-4"  "AH-5"  "RLZ-1" "TI-1"  "TI-10" "TI-2"  "TI-3"  "TI-4"  "TI-5" 
## [10] "TI-6"  "TI-7"  "TI-8"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 7), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.0850 1.3922 1.0863 0.70334 0.64347 0.44563 0.31268
## Proportion of Variance 0.4953 0.2208 0.1344 0.05636 0.04717 0.02262 0.01114
## Cumulative Proportion  0.4953 0.7161 0.8505 0.90687 0.95404 0.97666 0.98780
##                            PC8     PC9    PC10
## Standard deviation     0.24332 0.18520 0.11656
## Proportion of Variance 0.00674 0.00391 0.00155
## Cumulative Proportion  0.99454 0.99845 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Israel elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Israel grouped by area")

PCA(pxrf_final[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 19 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

PCA with only mudbrick samples:

# Elements
colnames(pxrf_MB[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_2 <- prcomp(pxrf_MB[3:12])
summary(pca_2)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.3606 1.6226 0.7143 0.52086 0.42466 0.34190 0.21405
## Proportion of Variance 0.5941 0.2807 0.0544 0.02892 0.01923 0.01246 0.00488
## Cumulative Proportion  0.5941 0.8748 0.9292 0.95809 0.97732 0.98978 0.99467
##                            PC8     PC9    PC10
## Standard deviation     0.18778 0.10943 0.05281
## Proportion of Variance 0.00376 0.00128 0.00030
## Cumulative Proportion  0.99843 0.99970 1.00000
# PCA plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Israel elements")

autoplot(pca_2, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Israel grouped by area")

PCA(pxrf_MB[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 12 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=4)

area <- as.factor(pxrf_MB[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]

plot(dend, main="HCA with sample areas")
legend("topright", legend = levels(area), fill = cols_a)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_israel.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2)

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_israel.xlsx", sep=";")
tga <- read.xlsx("data/tga_israel.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:19          Min.   :7.921   Min.   :10.55   Min.   :2.081  
##  Class :character   1st Qu.:8.557   1st Qu.:11.23   1st Qu.:2.403  
##  Mode  :character   Median :9.278   Median :11.78   Median :2.584  
##                     Mean   :9.017   Mean   :11.63   Mean   :2.611  
##                     3rd Qu.:9.452   3rd Qu.:11.92   3rd Qu.:2.828  
##                     Max.   :9.787   Max.   :12.62   Max.   :3.231  
##    dry_weight     after_550_C     after_950_C       context         
##  Min.   :10.52   Min.   :10.42   Min.   : 9.773   Length:19         
##  1st Qu.:11.23   1st Qu.:11.18   1st Qu.:10.376   Class :character  
##  Median :11.76   Median :11.70   Median :10.971   Mode  :character  
##  Mean   :11.60   Mean   :11.53   Mean   :10.884                     
##  3rd Qu.:11.88   3rd Qu.:11.78   3rd Qu.:11.322                     
##  Max.   :12.56   Max.   :12.49   Max.   :12.371                     
##      type          
##  Length:19         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950           context         
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      type          
##  Length:12         
##  Class :character  
##  Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi)

boxplot(c950~context, data=loi)

# Color by context
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by type      
ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(context), colour = factor(type))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga)

boxplot(c950~context, data=tga)

# Color by context
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_israel.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :43.75   Min.   : 3.160   Min.   :13.85  
##  1st Qu.:56.08   1st Qu.: 3.770   1st Qu.:14.71  
##  Median :64.30   Median : 4.350   Median :15.85  
##  Mean   :63.19   Mean   : 6.649   Mean   :17.27  
##  3rd Qu.:71.51   3rd Qu.: 6.280   3rd Qu.:17.70  
##  Max.   :75.34   Max.   :19.530   Max.   :25.84
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Palaepaphos

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_PP.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all))
##  Area  Type   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Mn 
##     0     0    34    34    34    35    35    34    36    34     0    41     0 
##    Fe    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Rh    Ag 
##     0     0     0    34    48    35    34    36     0    51    49

pxrf_final: Final analysis data set with only the selected elements, all samples (soil and published included)

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-V) %>% 
        select(-As) %>% 
        select(-Rh) %>% 
        select(-Ag)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final)
##  [1] "Area" "Type" "MgO"  "Ti"   "Mn"   "Fe"   "Cu"   "Zn"   "Rb"   "Sr"  
## [11] "Y"    "Zr"

pxrf_MB: Final analysis data set for NEW MUDBRICK samples only (= no soil or published samples)

pxrf_MB <- pxrf_final[c(1:11), ]
rownames(pxrf_MB)
##  [1] "PP-10" "PP-11" "PP-12" "PP-13" "PP-9"  "PP-19" "PP-14" "PP-15" "PP-16"
## [10] "PP-17" "PP-18"

pxrf_soil: Final analysis data set for NEW samples only (= no published samples, yes soil)

pxrf_soil <- pxrf_final[-c(12:45), ]
rownames(pxrf_soil)
##  [1] "PP-10" "PP-11" "PP-12" "PP-13" "PP-9"  "PP-19" "PP-14" "PP-15" "PP-16"
## [10] "PP-17" "PP-18" "PP-1"  "PP-2"  "PP-3"  "PP-6"  "PP-4"  "PP-7"  "PP-5" 
## [19] "PP-8"

pxrf_MB2: Final analysis data set for ALL MUDBRICK samples (= no soil)

pxrf_MB2 <- pxrf_final[c(1:45), ]
rownames(pxrf_MB2)
##  [1] "PP-10" "PP-11" "PP-12" "PP-13" "PP-9"  "PP-19" "PP-14" "PP-15" "PP-16"
## [10] "PP-17" "PP-18" "1"     "10"    "11"    "12"    "13"    "14"    "15"   
## [19] "16"    "17"    "18"    "19"    "2"     "20"    "21"    "22"    "23"   
## [28] "24"    "25"    "26"    "27"    "28"    "29"    "3"     "30"    "31"   
## [37] "32"    "33"    "34"    "4"     "5"     "6"     "7"     "8"     "9"

pXRF: K means cluster analysis

Only the “new” mudbrick samples

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_MB[3:12], 3), data = pxrf_MB, label = TRUE, label.size = 3)

pXRF: PCA

PCA with only new mudbrick samples:

# Elements
colnames(pxrf_MB[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_MB[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.5967 1.0886 0.99328 0.85704 0.40479 0.33892 0.20666
## Proportion of Variance 0.6735 0.1184 0.09854 0.07336 0.01637 0.01147 0.00427
## Cumulative Proportion  0.6735 0.7918 0.89037 0.96373 0.98010 0.99157 0.99584
##                            PC8     PC9    PC10
## Standard deviation     0.17917 0.09521 0.02255
## Proportion of Variance 0.00321 0.00091 0.00005
## Cumulative Proportion  0.99904 0.99995 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")

autoplot(pca_1, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

PCA(pxrf_MB[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 11 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

PCA with soil samples:

# Elements
colnames(pxrf_soil[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_2 <- prcomp(pxrf_soil[3:12])
summary(pca_2)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.0533 1.6050 1.0187 0.82293 0.69376 0.39645 0.33871
## Proportion of Variance 0.4498 0.2748 0.1107 0.07225 0.05135 0.01677 0.01224
## Cumulative Proportion  0.4498 0.7247 0.8354 0.90763 0.95898 0.97575 0.98799
##                            PC8     PC9   PC10
## Standard deviation     0.24806 0.21529 0.0685
## Proportion of Variance 0.00657 0.00495 0.0005
## Cumulative Proportion  0.99455 0.99950 1.0000
# PCA plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")

autoplot(pca_2, data=pxrf_soil, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

autoplot(pca_2, data=pxrf_soil, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by sample type")

PCA(pxrf_soil[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 19 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

PCA with all the mudbrick samples

Including the previously published ones, no soil

# Elements
colnames(pxrf_MB2[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_3 <- prcomp(pxrf_MB2[3:12])
summary(pca_3)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     1.8473 1.1712 0.76106 0.60306 0.44231 0.40557 0.30364
## Proportion of Variance 0.5429 0.2182 0.09215 0.05786 0.03112 0.02617 0.01467
## Cumulative Proportion  0.5429 0.7611 0.85326 0.91112 0.94224 0.96841 0.98307
##                            PC8     PC9    PC10
## Standard deviation     0.22882 0.19928 0.11965
## Proportion of Variance 0.00833 0.00632 0.00228
## Cumulative Proportion  0.99140 0.99772 1.00000
# PCA plots
biplot(pca_3, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")

autoplot(pca_3, data=pxrf_MB2, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

PCA(pxrf_MB2[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 45 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

HCA including only new samples:

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB %>%                         # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=4)

area <- as.factor(pxrf_MB[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]

plot(dend, main="HCA with sample areas")
legend("topright", legend = levels(area), fill = cols_a)

# HCA dendrogram, samples color coded by type:
dend2 <- 
    pxrf_soil %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=4)

type <- as.factor(pxrf_soil[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
col_type <- cols_t[type]
labels_colors(dend2) <- col_type[order.dendrogram(dend2)]

plot(dend2, main="HCA with new samples by type")
legend("topright", legend = levels(type), fill = cols_t)

HCA including all mudbrick samples :

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB2 %>%                         # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=4)

area <- as.factor(pxrf_MB2[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]

plot(dend, main="HCA with sample areas")
legend("topright", legend = levels(area), fill = cols_a)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_PP.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Creating subsets for only mudbricks
MB_grain <- grain_01[c(1:11),]
MB_context <- grain[c(1:11),]

# Ternary plots
ggtern(data=MB_grain, aes(x=Clay, y=Sand, z=Silt, color=MB_context$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(MB_grain)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_PP.xlsx", sep=";")
tga <- read.xlsx("data/tga_PP.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet;weight       sample;wet   
##  Length:27          Min.   :8.100   Min.   : 9.637   Min.   :1.537  
##  Class :character   1st Qu.:8.639   1st Qu.:10.859   1st Qu.:2.193  
##  Mode  :character   Median :8.997   Median :11.184   Median :2.371  
##                     Mean   :8.988   Mean   :11.274   Mean   :2.286  
##                     3rd Qu.:9.454   3rd Qu.:11.844   3rd Qu.:2.453  
##                     Max.   :9.560   Max.   :12.301   Max.   :2.785  
##    dry_weight      after_550_C      after_950_C      context         
##  Min.   : 9.595   Min.   : 9.484   Min.   : 9.23   Length:27         
##  1st Qu.:10.813   1st Qu.:10.721   1st Qu.:10.27   Class :character  
##  Median :11.132   Median :11.019   Median :10.59   Mode  :character  
##  Mean   :11.216   Mean   :11.113   Mean   :10.67                     
##  3rd Qu.:11.777   3rd Qu.:11.681   3rd Qu.:11.18                     
##  Max.   :12.218   Max.   :12.076   Max.   :11.56                     
##      type          
##  Length:27         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:30          Length:30          Length:30          Length:30         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950           context         
##  Length:30          Length:30          Length:30          Length:30         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      type          
##  Length:30         
##  Class :character  
##  Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi)

boxplot(c950~context, data=loi)

# Removing duplicates from the next graph and creating MB only subset
loi <- subset(loi[1:19, ])
loi_MB <- subset(loi[9:19, ])
rownames(loi)
##  [1] "PP-1"  "PP-2"  "PP-3"  "PP-4"  "PP-5"  "PP-6"  "PP-7"  "PP-8"  "PP-9" 
## [10] "PP-10" "PP-11" "PP-12" "PP-13" "PP-14" "PP-15" "PP-16" "PP-17" "PP-18"
## [19] "PP-19"
rownames(loi_MB)
##  [1] "PP-9"  "PP-10" "PP-11" "PP-12" "PP-13" "PP-14" "PP-15" "PP-16" "PP-17"
## [10] "PP-18" "PP-19"
# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi_MB)

boxplot(c950~context, data=loi_MB)

# Color by context
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by type      
ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(context), colour = factor(type))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# MB only
ggplot(loi_MB, 
      aes(c550, c950, label = rownames(loi_MB), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "MB only organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi_MB, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga)

boxplot(c950~context, data=tga)

# Removing duplicates and creating MB only subset
tga_MB <- subset(tga[9:20, ])
tga_MB <- subset(tga_MB[-4, ])
rownames(tga_MB)
##  [1] "PP-9"  "PP-10" "PP-11" "PP-12" "PP-13" "PP-14" "PP-15" "PP-16" "PP-17"
## [10] "PP-18" "PP-19"
# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga_MB)

boxplot(c950~context, data=tga_MB)

# Color by context
ggplot(tga_MB, 
      aes(c550, c950, label = rownames(tga_MB), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga_MB, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_PP.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :62.38   Min.   : 7.27   Min.   :19.90  
##  1st Qu.:63.47   1st Qu.: 8.50   1st Qu.:21.36  
##  Median :64.19   Median : 9.34   Median :21.72  
##  Mean   :65.50   Mean   : 9.29   Mean   :21.92  
##  3rd Qu.:65.78   3rd Qu.:10.56   3rd Qu.:22.96  
##  Max.   :72.00   Max.   :10.90   Max.   :23.90
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Artaxata

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AA.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all))
##  Area  Type   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Cr 
##     0     0     0     0     0     1     0     0     0     0     0     5     8 
##    Mn    Fe    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Nb    Ba 
##     0     0     0     0     0     1     0     0     0     0     4     3

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-V) %>% 
        select(-As) %>% 
        select(-Nb) %>% 
        select(-Cr)  %>% 
        select(-Ba)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final)
##  [1] "Area" "Type" "MgO"  "Ti"   "Mn"   "Fe"   "Cu"   "Zn"   "Rb"   "Sr"  
## [11] "Y"    "Zr"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 4), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6    PC7
## Standard deviation     2.6820 1.1972 0.84041 0.53728 0.45231 0.32524 0.2122
## Proportion of Variance 0.7193 0.1433 0.07063 0.02887 0.02046 0.01058 0.0045
## Cumulative Proportion  0.7193 0.8627 0.93328 0.96215 0.98261 0.99318 0.9977
##                            PC8     PC9 PC10
## Standard deviation     0.14003 0.05947    0
## Proportion of Variance 0.00196 0.00035    0
## Cumulative Proportion  0.99965 1.00000    1
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Artaxata elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Artaxata grouped by area")

PCA(pxrf_final[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 10 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=7)

area <- as.factor(pxrf_final[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]

plot(dend, main="HCA with sample areas")
legend("topright", legend = levels(area), fill = cols_a)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AA.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_AA.xlsx", sep=";")
tga <- read.xlsx("data/tga_AA.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:10          Min.   :8.066   Min.   :10.70   Min.   :1.923  
##  Class :character   1st Qu.:8.461   1st Qu.:10.93   1st Qu.:2.326  
##  Mode  :character   Median :8.995   Median :11.48   Median :2.558  
##                     Mean   :8.892   Mean   :11.39   Mean   :2.496  
##                     3rd Qu.:9.174   3rd Qu.:11.75   3rd Qu.:2.701  
##                     Max.   :9.890   Max.   :12.18   Max.   :2.842  
##    dry_weight     after_550_C     after_950_C      context         
##  Min.   :10.58   Min.   :10.47   Min.   :10.31   Length:10         
##  1st Qu.:10.85   1st Qu.:10.78   1st Qu.:10.60   Class :character  
##  Median :11.43   Median :11.33   Median :11.16   Mode  :character  
##  Mean   :11.32   Mean   :11.22   Mean   :11.07                     
##  3rd Qu.:11.69   3rd Qu.:11.58   3rd Qu.:11.42                     
##  Max.   :12.13   Max.   :12.06   Max.   :11.94                     
##      type          
##  Length:10         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950           context         
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      type          
##  Length:12         
##  Class :character  
##  Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi)

boxplot(c950~context, data=loi)

# Color by context
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga)

boxplot(c950~context, data=tga)

# Color by context
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AA.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :51.37   Min.   :3.880   Min.   :15.92  
##  1st Qu.:57.73   1st Qu.:4.402   1st Qu.:16.49  
##  Median :58.79   Median :4.555   Median :16.93  
##  Mean   :58.18   Mean   :5.067   Mean   :17.32  
##  3rd Qu.:59.58   3rd Qu.:4.695   3rd Qu.:17.32  
##  Max.   :61.57   Max.   :8.670   Max.   :21.47
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)